import geopandas as gpd
import pandas as pdcarjackings_path = "data/Chi_Carjackings/Chicago_Carjackings.shp"
community_areas_path = "data/Chi-CCA/Chicago_2020.shp"
carjacking_variables_path = "data/Chi_Carjackings/carjacking_variables.xlsx"# 加载劫车事件点数据(点要素)
gdf_carjackings = gpd.read_file(carjackings_path)
# 加载芝加哥社区区块数据(面要素)
gdf_community_areas = gpd.read_file(community_areas_path)df_carjacking_vars = pd.read_excel(carjacking_variables_path)# 查看劫车数据结构
print(gdf_carjackings.head())
print(gdf_carjackings.info())
# 查看社区数据结构
print(gdf_community_areas.head())
print(gdf_community_areas.info())
# 查看变量表数据结构
print(df_carjacking_vars.head())
print(df_carjacking_vars.info()) ID Case _mber Date IUCR \
0 12243012.0 JD456735 12/11/2020 11:45:00 PM 326.0
1 12130750.0 JD326543 08/09/2020 08:30:00 PM 326.0
2 12012068.0 JD189121 03/18/2020 12:03:00 AM 326.0
3 12012705.0 JD189547 03/18/2020 01:00:00 PM 326.0
4 12013533.0 JD189766 03/18/2020 05:32:00 PM 325.0
Descr_tion Locat_tion Beat District Ward \
0 AGGRAVATED VEHICULAR HIJACKING STREET 723.0 7.0 6.0
1 AGGRAVATED VEHICULAR HIJACKING GAS STATION 1234.0 12.0 25.0
2 AGGRAVATED VEHICULAR HIJACKING STREET 1114.0 11.0 28.0
3 AGGRAVATED VEHICULAR HIJACKING STREET 1114.0 11.0 28.0
4 VEHICULAR HIJACKING STREET 712.0 7.0 16.0
Commu_Area FBI Code X Coo_nate Y Coo_nate Latitude Longitude \
0 68.0 3.0 1171154.0 1859576.0 41.770145 -87.648175
1 31.0 3.0 1160694.0 1889888.0 41.853547 -87.685680
2 26.0 3.0 1147266.0 1900617.0 41.883256 -87.734691
3 26.0 3.0 1148374.0 1900366.0 41.882546 -87.730629
4 68.0 3.0 1171683.0 1864278.0 41.783036 -87.646098
geometry
0 POINT (1171154.000 1859576.000)
1 POINT (1160694.000 1889888.000)
2 POINT (1147266.000 1900617.000)
3 POINT (1148374.000 1900366.000)
4 POINT (1171683.000 1864278.000)
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1412 entries, 0 to 1411
Data columns (total 16 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 ID 1412 non-null float64
1 Case _mber 1412 non-null object
2 Date 1412 non-null object
3 IUCR 1412 non-null float64
4 Descr_tion 1412 non-null object
5 Locat_tion 1412 non-null object
6 Beat 1412 non-null float64
7 District 1412 non-null float64
8 Ward 1412 non-null float64
9 Commu_Area 1412 non-null float64
10 FBI Code 1412 non-null float64
11 X Coo_nate 1412 non-null float64
12 Y Coo_nate 1412 non-null float64
13 Latitude 1412 non-null float64
14 Longitude 1412 non-null float64
15 geometry 1412 non-null geometry
dtypes: float64(11), geometry(1), object(4)
memory usage: 176.6+ KB
None
area_num_1 area_numbe community shape_area shape_len \
0 35.0 35.0 DOUGLAS 4.600462e+07 31027.054510
1 36.0 36.0 OAKLAND 1.691396e+07 19565.506153
2 37.0 37.0 FULLER PARK 1.991670e+07 25339.089750
3 38.0 38.0 GRAND BOULEVARD 4.849250e+07 28196.837157
4 39.0 39.0 KENWOOD 2.907174e+07 23325.167906
districtno district GEOID GEOG 2000_POP ... KOREAN \
0 7.0 South Side 35.0 Douglas 26470.0 ... 145.0
1 7.0 South Side 36.0 Oakland 6110.0 ... 0.0
2 7.0 South Side 37.0 Fuller Park 3420.0 ... 0.0
3 7.0 South Side 38.0 Grand Boulevard 28006.0 ... 0.0
4 7.0 South Side 39.0 Kenwood 18363.0 ... 124.0
OTHASIAN OTHER_EURO OTHUNSPEC 2000_WHITE 2000_HISP 2000_BLACK \
0 571.0 1290.0 555.0 1745.0 295.0 22635.0
1 22.0 98.0 342.0 40.0 58.0 5957.0
2 0.0 15.0 0.0 18.0 116.0 3225.0
3 0.0 258.0 83.0 173.0 236.0 27370.0
4 223.0 361.0 175.0 2915.0 301.0 13900.0
2000_ASIAN 2000_OTHER geometry
0 1390.0 405.0 POLYGON ((-87.60914 41.84469, -87.60915 41.844...
1 8.0 47.0 POLYGON ((-87.59215 41.81693, -87.59231 41.816...
2 6.0 55.0 POLYGON ((-87.62880 41.80189, -87.62879 41.801...
3 21.0 206.0 POLYGON ((-87.60671 41.81681, -87.60670 41.816...
4 785.0 462.0 POLYGON ((-87.59215 41.81693, -87.59215 41.816...
[5 rows x 217 columns]
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 77 entries, 0 to 76
Columns: 217 entries, area_num_1 to geometry
dtypes: float64(193), geometry(1), object(23)
memory usage: 130.7+ KB
None
Variable Description
0 ID Internal identifier
1 Case_mber Internal case number
2 Date date of incident
3 IUCR Illinois Uniform Crime Reporting code
4 Descr_tion incident description
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17 entries, 0 to 16
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Variable 16 non-null object
1 Description 15 non-null object
dtypes: object(2)
memory usage: 404.0+ bytes
None
# 查看坐标参考系(CRS)
print("Carjackings CRS:", gdf_carjackings.crs)
print("Community Areas CRS:", gdf_community_areas.crs)
# 如果二者不一致,可以将其中一个数据集重投影到另一个的CRS,例如:
# gdf_carjackings = gdf_carjackings.to_crs(gdf_community_areas.crs)
# 查看缺失值情况
print("\nMissing values in carjackings data:")
print(gdf_carjackings.isnull().sum())
print("\nMissing values in community areas data:")
print(gdf_community_areas.isnull().sum())
# 检查几何对象类型
print("\nCarjackings geometry type:", gdf_carjackings.geom_type.unique())
print("Community Areas geometry type:", gdf_community_areas.geom_type.unique())
# 确保坐标存在且有效
valid_carjackings = gdf_carjackings[gdf_carjackings.geometry.notnull()]
print("\nNumber of valid carjacking points:", len(valid_carjackings))
print("Number of all carjacking points:", len(gdf_carjackings))
# 如果需要的话,还可以检查坐标范围,以确保数据大致在芝加哥范围内
print("\nCarjackings bounding box:", gdf_carjackings.total_bounds)
print("Community Areas bounding box:", gdf_community_areas.total_bounds)Carjackings CRS: EPSG:3435
Community Areas CRS: GEOGCS["unknown",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["Degree",0.0174532925199433],AXIS["Longitude",EAST],AXIS["Latitude",NORTH]]
Missing values in carjackings data:
ID 0
Case _mber 0
Date 0
IUCR 0
Descr_tion 0
Locat_tion 0
Beat 0
District 0
Ward 0
Commu_Area 0
FBI Code 0
X Coo_nate 0
Y Coo_nate 0
Latitude 0
Longitude 0
geometry 0
dtype: int64
Missing values in community areas data:
area_num_1 0
area_numbe 0
community 0
shape_area 0
shape_len 0
..
2000_HISP 0
2000_BLACK 0
2000_ASIAN 0
2000_OTHER 0
geometry 0
Length: 217, dtype: int64
Carjackings geometry type: ['Point']
Community Areas geometry type: ['Polygon' 'MultiPolygon']
Number of valid carjacking points: 1412
Number of all carjacking points: 1412
Carjackings bounding box: [1099640. 1816728. 1203211. 1950354.]
Community Areas bounding box: [-87.94011408 41.64454312 -87.5241371 42.02303859]
# 将Community Areas数据重投影为EPSG:3435
gdf_community_areas = gdf_community_areas.to_crs(epsg=3435)
# 检查结果
print(gdf_community_areas.crs)EPSG:3435
# 使用sjoin进行空间连接
# how='left':保留carjackings中的所有点,from='carjackings'中返回其所属的community polygons
# op='within'或'op='intersects':由于点位于面内,所以可使用'within'匹配事件所在社区
gdf_carjackings_joined = gpd.sjoin(gdf_carjackings, gdf_community_areas, how='left', predicate='within')
# 检查结果
print(gdf_carjackings_joined.head())
print(gdf_carjackings_joined.crs) ID Case _mber Date IUCR \
0 12243012.0 JD456735 12/11/2020 11:45:00 PM 326.0
1 12130750.0 JD326543 08/09/2020 08:30:00 PM 326.0
2 12012068.0 JD189121 03/18/2020 12:03:00 AM 326.0
3 12012705.0 JD189547 03/18/2020 01:00:00 PM 326.0
4 12013533.0 JD189766 03/18/2020 05:32:00 PM 325.0
Descr_tion Locat_tion Beat District Ward \
0 AGGRAVATED VEHICULAR HIJACKING STREET 723.0 7.0 6.0
1 AGGRAVATED VEHICULAR HIJACKING GAS STATION 1234.0 12.0 25.0
2 AGGRAVATED VEHICULAR HIJACKING STREET 1114.0 11.0 28.0
3 AGGRAVATED VEHICULAR HIJACKING STREET 1114.0 11.0 28.0
4 VEHICULAR HIJACKING STREET 712.0 7.0 16.0
Commu_Area ... ARABIC KOREAN OTHASIAN OTHER_EURO OTHUNSPEC \
0 68.0 ... 0.0 0.0 1.0 76.0 170.0
1 31.0 ... 203.0 186.0 136.0 256.0 82.0
2 26.0 ... 0.0 0.0 0.0 60.0 11.0
3 26.0 ... 0.0 0.0 0.0 60.0 11.0
4 68.0 ... 0.0 0.0 1.0 76.0 170.0
2000_WHITE 2000_HISP 2000_BLACK 2000_ASIAN 2000_OTHER
0 178.0 347.0 39352.0 27.0 318.0
1 3587.0 39144.0 774.0 121.0 405.0
2 133.0 201.0 22564.0 18.0 103.0
3 133.0 201.0 22564.0 18.0 103.0
4 178.0 347.0 39352.0 27.0 318.0
[5 rows x 233 columns]
EPSG:3435
# 查看社区数据中的所有列名,找到中位收入字段名
print(gdf_community_areas.columns)
gdf_community_areas['MEDINC'].describe()
median_income = gdf_community_areas['MEDINC'].median()
print("Median income for all communities:", median_income)
gdf_community_areas['income_group'] = gdf_community_areas['MEDINC'].apply(lambda x: 'low' if x < median_income else 'high')
# 确保使用空间连接后的数据
# 按照社区唯一标识字段(例如area_numbe或area_num_1)对事件计数
# 注意替换'area_numbe'为实际标识社区的列名
event_counts = gdf_carjackings_joined.groupby('area_numbe').size().reset_index(name='carjackings_count')
# 将事件数合并回社区数据
gdf_community_areas = gdf_community_areas.merge(event_counts, on='area_numbe', how='left')
# 对于没有劫车事件的社区,填充为0
gdf_community_areas['carjackings_count'] = gdf_community_areas['carjackings_count'].fillna(0)
gdf_community_areas['area_sqm'] = gdf_community_areas.geometry.area
gdf_community_areas['carjackings_density'] = (gdf_community_areas['carjackings_count'] / (gdf_community_areas['area_sqm'] / 1_000_000))
print(gdf_community_areas[['area_numbe', 'MEDINC', 'income_group', 'carjackings_count', 'carjackings_density']].head())Index(['area_num_1', 'area_numbe', 'community', 'shape_area', 'shape_len',
'districtno', 'district', 'GEOID', 'GEOG', '2000_POP',
...
'KOREAN', 'OTHASIAN', 'OTHER_EURO', 'OTHUNSPEC', '2000_WHITE',
'2000_HISP', '2000_BLACK', '2000_ASIAN', '2000_OTHER', 'geometry'],
dtype='object', length=217)
Median income for all communities: 51689.0
area_numbe MEDINC income_group carjackings_count carjackings_density
0 35.0 31856.0 low 20.0 0.434738
1 36.0 32844.0 low 4.0 0.236491
2 37.0 23148.0 low 9.0 0.451881
3 38.0 33503.0 low 31.0 0.639273
4 39.0 49114.0 low 11.0 0.378374
import matplotlib.pyplot as plt
# 可视化:根据income_group着色绘制社区分区
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
gdf_community_areas.plot(column='income_group', categorical=True, legend=True, ax=ax)
ax.set_title('Chicago Communities by Income Group')
ax.axis('off')
plt.show()
# 可视化:根据carjackings_density绘制社区分区(连续值专题图)
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
gdf_community_areas.plot(column='carjackings_density', cmap='OrRd', legend=True, ax=ax)
ax.set_title('Carjackings Density (per sq. km)')
ax.axis('off')
plt.show()

import folium
import geopandas as gpd
gdf_community_areas = gdf_community_areas.to_crs(epsg=4326)
mean_lat = gdf_community_areas.geometry.centroid.y.mean()
mean_lon = gdf_community_areas.geometry.centroid.x.mean()
m = folium.Map(location=[mean_lat, mean_lon], zoom_start=10, tiles='cartodbpositron')
folium.Choropleth(
geo_data=gdf_community_areas,
data=gdf_community_areas,
columns=['community', 'carjackings_density'],
key_on='feature.properties.community',
fill_color='OrRd',
fill_opacity=0.7,
line_opacity=0.2,
legend_name='Carjackings Density (per sq. km)'
).add_to(m)
def style_function(feature):
income_grp = feature['properties']['income_group']
color = 'blue' if income_grp == 'high' else 'green'
return {
'fillOpacity': 0, # 不改变填充(底色已由choropleth确定)
'color': color,
'weight': 2
}
# 准备在GeoJson中显示的字段信息(弹出或鼠标悬停时显示)
# 例如显示 'community', 'MEDINC', 'carjackings_count', 'carjackings_density', 'income_group'
tooltip_fields = ['community', 'MEDINC', 'carjackings_count', 'carjackings_density', 'income_group']
tooltip_aliases = ['Community:', 'Median Income:', 'Carjacking Count:', 'Carjacking Density:', 'Income Group:']
# 使用GeoJson叠加社区边界线并添加tooltip
folium.GeoJson(
gdf_community_areas,
name='Income Group Boundaries',
style_function=style_function,
tooltip=folium.GeoJsonTooltip(
fields=tooltip_fields,
aliases=tooltip_aliases,
localize=True
)
).add_to(m)
mC:\Users\huang\AppData\Local\Temp\ipykernel_8420\2462306573.py:14: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
mean_lat = gdf_community_areas.geometry.centroid.y.mean()
C:\Users\huang\AppData\Local\Temp\ipykernel_8420\2462306573.py:15: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
mean_lon = gdf_community_areas.geometry.centroid.x.mean()
Make this Notebook Trusted to load map: File -> Trust Notebook
m.save('my_interactive_map.html')import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from shapely.ops import unary_union
from shapely.geometry import Point, Polygon
import pointpats
from pointpats import distance_statistics as ds
############################################
# 1. 加载数据
############################################
carjackings_path = "data/Chi_Carjackings/Chicago_Carjackings.shp"
community_areas_path = "data/Chi-CCA/Chicago_2020.shp"
carjacking_variables_path = "data/Chi_Carjackings/carjacking_variables.xlsx"
gdf_carjackings = gpd.read_file(carjackings_path)
gdf_community_areas = gpd.read_file(community_areas_path)
df_carjacking_vars = pd.read_excel(carjacking_variables_path)
############################################
# 2. 检查并统一CRS
############################################
print("Carjackings CRS:", gdf_carjackings.crs)
print("Community Areas CRS:", gdf_community_areas.crs)
# 如果CRS不一致,需要重投影。例如,劫车数据EPSG:3435,社区数据为地理坐标:
# 假设以EPSG:3435为统一坐标系:
if gdf_community_areas.crs != gdf_carjackings.crs:
gdf_community_areas = gdf_community_areas.to_crs(gdf_carjackings.crs)
print("After reprojecting, Community Areas CRS:", gdf_community_areas.crs)
############################################
# 3. 提取事件点坐标并确保为float类型
############################################
carjackings_points = np.array([(geom.x, geom.y) for geom in gdf_carjackings.geometry], dtype=float)
############################################
# 4. 构建研究区域多边形(study_area_poly)
############################################
study_area_poly = unary_union(gdf_community_areas.geometry)
if study_area_poly.geom_type == 'MultiPolygon':
# 选取面积最大的Polygon作为研究域
largest_polygon = max(study_area_poly.geoms, key=lambda a: a.area)
study_area_poly = largest_polygon
# 确保多边形坐标为float
# 提取Polygon坐标并重建Polygon以确保浮点类型
coords = np.array(study_area_poly.exterior.coords, dtype=float)
study_area_poly = Polygon(coords)
############################################
# 5. 定义距离序列与模拟次数,并确保为float类型
############################################
distance_support = np.arange(500, 5500, 500).astype(float)
iterations = 999
############################################
# 6. 使用k_test进行K函数分析
############################################
result = ds.k_test(
coordinates=carjackings_points,
support=distance_support,
hull=study_area_poly,
keep_simulations=True,
n_simulations=iterations
)
# 查看result属性
print("Attributes of result:", dir(result))
# 提取结果
obs = result.statistic # 观测K值数组
sims = result.simulations # 模拟K值数组 (iterations x len(distance_support))
r = result.support # 距离序列
# 计算95%置信区间
lower = np.percentile(sims, 2.5, axis=0)
upper = np.percentile(sims, 97.5, axis=0)
plt.figure(figsize=(10,6))
plt.plot(r, obs, color='red', label='Observed K')
plt.fill_between(r, lower, upper, color='gray', alpha=0.5, label='95% CI (Random Sim)')
plt.xlabel('Distance (m)')
plt.ylabel('K(d)')
plt.title("Ripley's K Function via k_test")
plt.legend()
plt.show()Carjackings CRS: EPSG:3435
Community Areas CRS: GEOGCS["unknown",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["Degree",0.0174532925199433],AXIS["Longitude",EAST],AXIS["Latitude",NORTH]]
After reprojecting, Community Areas CRS: EPSG:3435
Attributes of result: ['__add__', '__class__', '__class_getitem__', '__contains__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__getnewargs__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__match_args__', '__module__', '__mul__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__rmul__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '_asdict', '_field_defaults', '_fields', '_make', '_replace', 'count', 'index', 'pvalue', 'simulations', 'statistic', 'support']

# 确保 gdf_community_areas 中有 MEDINC 列
print("Columns in gdf_community_areas:", gdf_community_areas.columns)
# 检查MEDINC描述统计并计算中位数
print(gdf_community_areas['MEDINC'].describe())
median_income = gdf_community_areas['MEDINC'].median()
print("Median income for all communities:", median_income)
# 根据中位数划分income_group列
gdf_community_areas['income_group'] = gdf_community_areas['MEDINC'].apply(lambda x: 'low' if x < median_income else 'high')
# 再次确认
print("Check if income_group column is created:")
print(gdf_community_areas[['area_numbe','MEDINC','income_group']].head())Columns in gdf_community_areas: Index(['area_num_1', 'area_numbe', 'community', 'shape_area', 'shape_len',
'districtno', 'district', 'GEOID', 'GEOG', '2000_POP',
...
'KOREAN', 'OTHASIAN', 'OTHER_EURO', 'OTHUNSPEC', '2000_WHITE',
'2000_HISP', '2000_BLACK', '2000_ASIAN', '2000_OTHER', 'geometry'],
dtype='object', length=217)
count 77.000000
mean 56199.935065
std 27028.194034
min 15396.000000
25% 34396.000000
50% 51689.000000
75% 70223.000000
max 125033.000000
Name: MEDINC, dtype: float64
Median income for all communities: 51689.0
Check if income_group column is created:
area_numbe MEDINC income_group
0 35.0 31856.0 low
1 36.0 32844.0 low
2 37.0 23148.0 low
3 38.0 33503.0 low
4 39.0 49114.0 low
gdf_carjackings_joined['area_numbe'] = gdf_carjackings_joined['area_numbe'].fillna(-1)
low_income_areas = gdf_community_areas[gdf_community_areas['income_group'] == 'low']['area_numbe'].tolist()
high_income_areas = gdf_community_areas[gdf_community_areas['income_group'] == 'high']['area_numbe'].tolist()
low_income_events = gdf_carjackings_joined[gdf_carjackings_joined['area_numbe'].isin(low_income_areas)]
high_income_events = gdf_carjackings_joined[gdf_carjackings_joined['area_numbe'].isin(high_income_areas)]
print("Number of low-income community events:", len(low_income_events))
print("Number of high-income community events:", len(high_income_events))
print("\nLow income events head:")
print(low_income_events.head())
print("\nHigh income events head:")
print(high_income_events.head())Number of low-income community events: 964
Number of high-income community events: 446
Low income events head:
ID Case _mber Date IUCR \
0 12243012.0 JD456735 12/11/2020 11:45:00 PM 326.0
2 12012068.0 JD189121 03/18/2020 12:03:00 AM 326.0
3 12012705.0 JD189547 03/18/2020 01:00:00 PM 326.0
4 12013533.0 JD189766 03/18/2020 05:32:00 PM 325.0
5 12012624.0 JD189695 03/18/2020 01:00:00 PM 326.0
Descr_tion Locat_tion Beat District Ward \
0 AGGRAVATED VEHICULAR HIJACKING STREET 723.0 7.0 6.0
2 AGGRAVATED VEHICULAR HIJACKING STREET 1114.0 11.0 28.0
3 AGGRAVATED VEHICULAR HIJACKING STREET 1114.0 11.0 28.0
4 VEHICULAR HIJACKING STREET 712.0 7.0 16.0
5 AGGRAVATED VEHICULAR HIJACKING STREET 1513.0 15.0 29.0
Commu_Area ... ARABIC KOREAN OTHASIAN OTHER_EURO OTHUNSPEC \
0 68.0 ... 0.0 0.0 1.0 76.0 170.0
2 26.0 ... 0.0 0.0 0.0 60.0 11.0
3 26.0 ... 0.0 0.0 0.0 60.0 11.0
4 68.0 ... 0.0 0.0 1.0 76.0 170.0
5 25.0 ... 146.0 15.0 120.0 530.0 373.0
2000_WHITE 2000_HISP 2000_BLACK 2000_ASIAN 2000_OTHER
0 178.0 347.0 39352.0 27.0 318.0
2 133.0 201.0 22564.0 18.0 103.0
3 133.0 201.0 22564.0 18.0 103.0
4 178.0 347.0 39352.0 27.0 318.0
5 5662.0 4841.0 105369.0 642.0 1013.0
[5 rows x 233 columns]
High income events head:
ID Case _mber Date IUCR \
1 12130750.0 JD326543 08/09/2020 08:30:00 PM 326.0
6 12242966.0 JD456567 12/11/2020 07:30:00 PM 326.0
11 12130573.0 JD326320 08/09/2020 04:22:00 PM 326.0
12 12013044.0 JD190170 03/19/2020 07:50:00 AM 326.0
15 12013595.0 JD190763 03/19/2020 08:40:00 PM 325.0
Descr_tion Locat_tion Beat District Ward \
1 AGGRAVATED VEHICULAR HIJACKING GAS STATION 1234.0 12.0 25.0
6 AGGRAVATED VEHICULAR HIJACKING STREET 1225.0 12.0 28.0
11 AGGRAVATED VEHICULAR HIJACKING STREET 1821.0 18.0 2.0
12 AGGRAVATED VEHICULAR HIJACKING STREET 2222.0 22.0 21.0
15 VEHICULAR HIJACKING STREET 2234.0 22.0 34.0
Commu_Area ... ARABIC KOREAN OTHASIAN OTHER_EURO OTHUNSPEC \
1 31.0 ... 203.0 186.0 136.0 256.0 82.0
6 28.0 ... 421.0 512.0 1432.0 3563.0 386.0
11 8.0 ... 759.0 546.0 1477.0 5747.0 517.0
12 73.0 ... 1.0 0.0 26.0 298.0 10.0
15 75.0 ... 15.0 5.0 7.0 333.0 9.0
2000_WHITE 2000_HISP 2000_BLACK 2000_ASIAN 2000_OTHER
1 3587.0 39144.0 774.0 121.0 405.0
6 11731.0 4415.0 24546.0 4861.0 866.0
11 50397.0 2805.0 13884.0 4434.0 1291.0
12 193.0 231.0 29108.0 9.0 302.0
15 7510.0 533.0 16816.0 83.0 284.0
[5 rows x 233 columns]
import numpy as np
import matplotlib.pyplot as plt
from shapely.ops import unary_union
from shapely.geometry import Point, Polygon
from pointpats import distance_statistics as ds
# 首先从low_income_events和high_income_events中提取点坐标
low_points = np.array([(geom.x, geom.y) for geom in low_income_events.geometry], dtype=float)
high_points = np.array([(geom.x, geom.y) for geom in high_income_events.geometry], dtype=float)
# 使用与之前类似的距离序列和模拟次数
distance_support = np.arange(500, 5500, 500).astype(float)
iterations = 999
############################################
# 低收入社区事件 K函数分析
############################################
low_result = ds.k_test(
coordinates=low_points,
support=distance_support,
hull=study_area_poly, # 使用同一个研究区域,如有需要可只使用低收入社区的多边形集合
keep_simulations=True,
n_simulations=iterations
)
obs_low = low_result.statistic
sims_low = low_result.simulations
r_low = low_result.support
lower_low = np.percentile(sims_low, 2.5, axis=0)
upper_low = np.percentile(sims_low, 97.5, axis=0)
plt.figure(figsize=(10,6))
plt.plot(r_low, obs_low, color='red', label='Observed K (Low Income Areas)')
plt.fill_between(r_low, lower_low, upper_low, color='gray', alpha=0.5, label='95% CI (Random)')
plt.xlabel('Distance (m)')
plt.ylabel('K(d)')
plt.title("Ripley's K - Low Income Areas")
plt.legend()
plt.show()
############################################
# 高收入社区事件 K函数分析
############################################
high_result = ds.k_test(
coordinates=high_points,
support=distance_support,
hull=study_area_poly,
keep_simulations=True,
n_simulations=iterations
)
obs_high = high_result.statistic
sims_high = high_result.simulations
r_high = high_result.support
lower_high = np.percentile(sims_high, 2.5, axis=0)
upper_high = np.percentile(sims_high, 97.5, axis=0)
plt.figure(figsize=(10,6))
plt.plot(r_high, obs_high, color='blue', label='Observed K (High Income Areas)')
plt.fill_between(r_high, lower_high, upper_high, color='gray', alpha=0.5, label='95% CI (Random)')
plt.xlabel('Distance (m)')
plt.ylabel('K(d)')
plt.title("Ripley's K - High Income Areas")
plt.legend()
plt.show()
############################################
# 输出结果检查
############################################
print("Low-income areas K-test results:")
print("Attributes:", dir(low_result))
print("Observations shape:", obs_low.shape, "Simulations shape:", sims_low.shape)
print("\nHigh-income areas K-test results:")
print("Attributes:", dir(high_result))
print("Observations shape:", obs_high.shape, "Simulations shape:", sims_high.shape)

Low-income areas K-test results:
Attributes: ['__add__', '__class__', '__class_getitem__', '__contains__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__getnewargs__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__match_args__', '__module__', '__mul__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__rmul__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '_asdict', '_field_defaults', '_fields', '_make', '_replace', 'count', 'index', 'pvalue', 'simulations', 'statistic', 'support']
Observations shape: (10,) Simulations shape: (999, 10)
High-income areas K-test results:
Attributes: ['__add__', '__class__', '__class_getitem__', '__contains__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__getnewargs__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__match_args__', '__module__', '__mul__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__rmul__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '_asdict', '_field_defaults', '_fields', '_make', '_replace', 'count', 'index', 'pvalue', 'simulations', 'statistic', 'support']
Observations shape: (10,) Simulations shape: (999, 10)
import numpy as np
import matplotlib.pyplot as plt
# 从low_result和high_result中获取数据
obs_low = low_result.statistic
sims_low = low_result.simulations
obs_high = high_result.statistic
sims_high = high_result.simulations
r = low_result.support # 距离序列应当相同
# 计算模拟分布均值
mean_low = np.mean(sims_low, axis=0)
mean_high = np.mean(sims_high, axis=0)
# 归一化处理:观测值除以模拟均值
norm_low = obs_low / mean_low
norm_high = obs_high / mean_high
plt.figure(figsize=(10,6))
plt.plot(r, norm_low, color='red', label='Low Income Normalized')
plt.plot(r, norm_high, color='blue', label='High Income Normalized')
plt.axhline(y=1.0, color='gray', linestyle='--', label='Random Baseline')
plt.xlabel('Distance (m)')
plt.ylabel('Observed K / Mean Random K')
plt.title("Normalized K Function Comparison")
plt.legend()
plt.show()